# Finkel and Gehlbach, Reform and Rebellion in Weak States
# August 2018

library(tidyverse)

##########
## DATA ##
##########

## Load data

july14 <- read.csv("event_2014.07.12_.csv")
archive <- read.csv("archive_source.csv")
event <- merge(july14, archive, by="eventid")
uezd <- read.csv("uezd_coding.csv")
event <- merge(event,uezd, by="eventid")
rm(july14, archive, uezd)

## Sample selection

# Earlier (Estliand, Kurliand, Lifliand) and later (Bessarabia, Dagestan, 
# Erivan, Kutaisi, Tiflis) reform. The only events a) in one of these eight 
# provinces, and b) also in some other province are in both
# 55 (Lifliand, guberniya1) and 6 (Estliand, guberniya2).
event <- filter(event, !(event$guberniya1.x %in% 
                           c(3, 6, 38, 50, 52, 54, 55, 57)))

## Create action variables

a_indices <- c()
for (action_code in c(1:18,20:28,30,31,33:37,39)) {
  var_prefix <- case_when(
    # Refusals
    action_code %in% c(1:10, 34, 39) ~ "ar",
    # Thefts
    action_code %in% c(20:28, 30, 35, 36, 37) ~ "at",
    # Complaints
    action_code %in% c(11:18) ~ "ac",
    # Governance
    action_code %in% c(31, 33) ~ "ag")
  var_name <- paste0(var_prefix, action_code)
  event <- event %>%
    mutate(!!var_name := ifelse(peasantaction1 == action_code |
                                  peasantaction2 == action_code |
                                  peasantaction3 == action_code | 
                                  peasantaction4 == action_code |
                                  peasantaction5 == action_code, 1, NA))
  a_indices <- c(a_indices, var_name)
}
event[a_indices][is.na(event[a_indices])] <- 0 
# Three events with peasantaction == 9999, but only this with actual action
# ("Peasants refuse to sign contract b/c rumors of land being given to them")
event$ar9999 <- ifelse(event$eventid==3595, 1, 0)
a_indices <- c(a_indices, "ar9999")
# Do not include complaint to unknown (ac18): not obviously complaint to a 
# person, just coded when peasants were specific in what they wanted
a_indices <- a_indices[a_indices != "ac18"] 

# Broad definition of refusals
aR_indices <- paste0(rep("ar", 13), c(1:10, 34, 39, 9999))
# Narrow definition of refusals
ar_indices <- paste0(rep("ar", 3), c(3, 5, 7))

at_indices <- paste0(rep("at", 13), c(20:28, 30, 35, 36, 37))
ac_indices <- paste0(rep("ac", 7), c(11:17))
ag_indices <- paste0(rep("ag", 2), c(31, 33))

event$a <- apply(event[, a_indices], 1, max)
event$ar <- apply(event[, ar_indices], 1, max)
event$at <- apply(event[, at_indices], 1, max)
event$ac <- apply(event[, ac_indices], 1, max)
event$ag <- apply(event[, ag_indices], 1, max)

## Create cause variables

cause_indices <- c()
for (cause_code in c(1:24, 26:28)) {
  var_prefix <- case_when(
    # Landlord/peasant relations
    cause_code %in% c(1:9, 16:18, 22:24) ~ "cr",
    # Serf status
    cause_code %in% c(10:12) ~ "cs",
    # Liberation
    cause_code %in% c(13:15, 28) ~ "cl",
    # Estate
    cause_code %in% c(19:21, 26) ~ "ce",
    # Other
    cause_code %in% c(27) ~ "co")
  var_name <- paste0(var_prefix, cause_code)
  event <- event %>%
    mutate(!!var_name := ifelse(peasantcause1 == cause_code | 
                                  peasantcause2 == cause_code |
                                  peasantcause3 == cause_code |
                                  peasantcause4 == cause_code |
                                  peasantcause5 == cause_code, 1, NA))
  cause_indices <- c(cause_indices, var_name)
}
event[cause_indices][is.na(event[cause_indices])] <- 0 

cr_indices <- paste0(rep("cr", 15), c(1:9, 16:18, 22:24))
cs_indices <- paste0(rep("cs", 3), c(10:12))
cl_indices <- paste0(rep("cl", 4), c(13:15, 28))
ce_indices <- paste0(rep("ce", 4), c(19:21, 26))

event$cr <- apply(event[, cr_indices], 1, max)
event$cs <- apply(event[, cs_indices], 1, max)
event$cl <- apply(event[, cl_indices], 1, max)
event$ce <- apply(event[, ce_indices], 1, max)
event$co <- event$co27

## Serf vs. non-serf

# When both "serf" and non-"serf" (9, 11), include in each count; 
# exclude rebel detachments (7) and soliders (10)

# Include "unknown" (9999) as "serf" 
serf_rows <- event$peasanttype %in% c(3, 4, 8, 9, 11, 13, 9999)
other_rows <- event$peasanttype %in% c(1, 2, 5, 6, 9, 11, 12)
var_list <- c("ar", "at", "ac", "ag", "a", "cr", "cs", "cl", "ce", "co")
for (var in var_list) {
  var_serf <- paste0(var, ".serf")
  var_other <- paste0(var, ".other")
  event <- event %>%
    mutate(!!var_serf := 0,
           !!var_other := 0)
  if (var == "co") {
    event$co.serf[serf_rows] <- event$co[serf_rows]
    event$co.other[other_rows] <- event$co[other_rows]
  } else {
    var_indices <- eval(parse(text = paste0(var, "_indices")))
    event[serf_rows, var_serf] <- 
      apply(event[serf_rows, var_indices], 1, max)
    event[other_rows, var_other] <- 
      apply(event[other_rows, var_indices], 1, max)
  }
}

# Do not include "unknown" as serf
serf_rows_no9999 <- event$peasanttype %in% c(3, 4, 8, 9, 11, 13)
event$ar.serf.no9999 <- 0
event[serf_rows_no9999,"ar.serf.no9999"] <- 
  apply(event[serf_rows_no9999, ar_indices], 1, max)
event$at.serf.no9999 <- 0
event[serf_rows_no9999,"at.serf.no9999"] <- 
  apply(event[serf_rows_no9999, at_indices], 1, max)

## Events in which military response

event <- event %>%
  mutate(ar.serf.mil = ifelse(govtresponse1 %in% c(1,4) |
                                govtresponse2 %in% c(1,4),
                              ar.serf, 0),
         ar.other.mil = ifelse(govtresponse1 %in% c(1,4) |
                                 govtresponse2 %in% c(1,4),
                               ar.other, 0),
         at.serf.mil = ifelse(govtresponse1 %in% c(1,4) |
                                govtresponse2 %in% c(1,4),
                              at.serf, 0),
         at.other.mil = ifelse(govtresponse1 %in% c(1,4) |
                                 govtresponse2 %in% c(1,4),
                               at.other, 0),
         a.serf.mil = ifelse(govtresponse1 %in% c(1,4) |
                               govtresponse2 %in% c(1,4),
                             a.serf, 0))

## Events in tsgaor

event <- event %>%
  mutate(tsgaor = ifelse(is.na(tsgaor), 0, tsgaor),
         ar.serf.tsgaor = ar.serf*tsgaor,
         ar.other.tsgaor = ar.other*tsgaor,
         at.serf.tsgaor = at.serf*tsgaor,
         at.other.tsgaor = at.other*tsgaor)

## Large events

event <- event %>%
  mutate(large = ifelse((numvillage > 1 & numvillage < 9999) | 
                          (numuyezd > 1 & numuyezd < 9999), 1, 0),
         ar.serf.large = ar.serf*large,
         ar.other.large = ar.other*large,
         at.serf.large = at.serf*large,
         at.other.large = at.other*large,
         a.serf.large = a.serf*large)

## Period

event <- event %>%
  mutate(period = case_when(startyear < 1861 ~ "Pre",
                            startyear %in% c(1861,1862) ~ "Transition",
                            startyear > 1862 ~ "Post"),
         period = factor(period, levels = c("Pre", "Transition", "Post")))

## Clean up

rm(list=ls(pattern="indices"))
rm(list=ls(pattern="rows"))
rm(list=ls(pattern="var"))
rm(list=ls(pattern="code"))

#############
## FIGURES ##
#############

library(grid)
library(gridExtra)

## Yearly dynamics

event.yearly <- event %>% 
  select(c("startyear", "ar.serf", "at.serf")) %>%
  group_by(startyear) %>%
  summarise_all(funs(sum)) %>%
  gather(v, value, -c("startyear")) %>% 
  separate(v, c("action", "col")) %>% 
  arrange(startyear) %>% 
  spread(col, value) %>%
  mutate(action = case_when(
    action=="ar" ~ "Refusals",
    action=="at" ~ "Thefts and Violence"
  ),
  action = factor(action, levels = c("Refusals", "Thefts and Violence"))) %>%
  group_by(startyear, action) %>%
  summarise_all(funs(mean))

event.yearly %>% ggplot(aes(x=startyear, y=serf)) +
  geom_line(size = 0.75) + facet_wrap(~action, ncol = 2) + 
  xlab("") + ylab("Number of events") +
  theme_bw() + ggtitle("(Former) serfs") +
  theme(panel.grid.minor=element_blank(),
        panel.grid.major.x=element_blank(),
        strip.text = element_text(size=10),
        panel.spacing = unit(.8, "lines"),
        plot.title = element_text(hjust = 0.5))
ggsave("yearly_serf.pdf")

event.yearly <- event %>% 
  select(c("startyear", "ar.other", "at.other")) %>%
  group_by(startyear) %>%
  summarise_all(funs(sum)) %>%
  gather(v, value, -c("startyear")) %>% 
  separate(v, c("action", "col")) %>% 
  arrange(startyear) %>% 
  spread(col, value) %>%
  mutate(action = case_when(
    action=="ar" ~ "Refusals",
    action=="at" ~ "Thefts and Violence"
  ),
  action = factor(action, levels = c("Refusals", "Thefts and Violence"))) %>%
  group_by(startyear, action) %>%
  summarise_all(funs(mean))

event.yearly %>% ggplot(aes(x=startyear, y=other)) +
  geom_line(size = 0.75) + facet_wrap(~action, ncol = 2) + 
  xlab("") + ylab("Number of events") +
  theme_bw() + ylim(0,423) + ggtitle("State and appanage peasants") +
  theme(panel.grid.minor=element_blank(),
        panel.grid.major.x=element_blank(),
        strip.text = element_text(size=10),
        panel.spacing = unit(.8, "lines"),
        plot.title = element_text(hjust = 0.5))
ggsave("yearly_other.pdf")

## Causes

event.cause <- event %>% 
  filter(ar == 1 | at == 1) %>%
  select(c("startyear", "period", 
           "cr.serf", "cs.serf", "cl.serf",
           "ce.serf", "co.serf")) %>%
  group_by(period, startyear) %>%
  summarise_all(funs(sum)) %>%
  ungroup() %>%
  gather(v, value, -c("period", "startyear")) %>% 
  separate(v, c("cause", "col")) %>% 
  arrange(period) %>% 
  spread(col, value) %>%
  mutate(cause = case_when(
    cause=="cr" ~ "L/P Relations",
    cause=="cs" ~ "Serf Status",
    cause=="cl" ~ "Liberation",
    cause=="ce" ~ "Estate",
    cause=="co" ~ "Other"
  ),
  cause = factor(cause, levels = c("Other", "Estate", "Liberation",
                                   "Serf Status", "L/P Relations"))) %>%
  select(-c(startyear)) %>%
  group_by(period, cause) %>%
  summarise_all(funs(mean))

event.cause %>% ggplot(aes(x=period, y=serf, fill=cause)) +
  geom_bar(aes(fill=cause), stat = "identity") +
  scale_fill_manual(values=gray.colors(5)) +
  xlab("") + ylab("Events per year") +
  theme_bw() +
  theme(legend.title=element_blank(),
        panel.grid.minor.x=element_blank(),
        panel.grid.major.x=element_blank())
ggsave("cause.pdf")

## Monthy dynamics

event.monthly <- event %>% filter(startyear > 1859 & 
                                    startyear < 1864 & startmonth <= 12 & 
                                    (ar == 1 | at == 1)) %>%
  mutate(startmonth = ifelse(startmonth < 10, 
                             paste0("0", startmonth),
                             paste0(startmonth)),
         startmonth = paste0(startyear, "-", startmonth, "-01")) %>%
  select(c("startmonth", "a.serf", "ar1")) %>%
  group_by(startmonth) %>%
  summarise_all(funs(sum)) %>%
  mutate(startmonth = as.Date(startmonth, "%Y-%m-%d"))

# Refusals + theft and violence, 1860 to 1863
break.vec <- c(seq(from=as.Date("1860-01-01"), 
                   to=as.Date("1863-12-01"), by="6 months"))
ggplot(event.monthly, aes(x=startmonth, y=a.serf)) + 
  geom_line() + geom_point()  +
  scale_x_date(breaks=break.vec, date_labels = "%b %Y",expand=c(.02,0)) +
  xlab("") + ylab("Number of events") + 
  theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1),
                     panel.grid.minor.x=element_blank(),
                     panel.grid.major.x=element_blank())
ggsave("monthly.pdf")

# Rejection of terms of emancipation, 1861 to mid-1863
break.vec <- c(seq(from=as.Date("1861-01-01"), 
                   to=as.Date("1863-03-01"), by="6 months"))
ggplot(event.monthly, aes(x=startmonth, y=ar1)) + 
  geom_line() + geom_point()  +
  scale_x_date(limits= c(as.Date("1861-01-01"),as.Date("1863-06-01")),
               breaks=break.vec, date_labels = "%b %Y",expand=c(.02,0)) +
  xlab("") + ylab("Number of events (terms of emancipation)") + 
  theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1),
                     panel.grid.minor=element_blank(),
                     panel.grid.major.x=element_blank())
ggsave("terms.pdf")

#Reported in footnote: any rejection of terms of liberation
event.monthly <- event %>% filter(startyear > 1859 & 
                                    startyear < 1864 & startmonth <= 12) %>%
  mutate(startmonth = ifelse(startmonth < 10, 
                             paste0("0", startmonth),
                             paste0(startmonth)),
         startmonth = paste0(startyear, "-", startmonth, "-01")) %>%
  select(c("startmonth", "a.serf", "ar1")) %>%
  group_by(startmonth) %>%
  summarise_all(funs(sum)) %>%
  mutate(startmonth = as.Date(startmonth, "%Y-%m-%d"))
ggplot(event.monthly, aes(x=startmonth, y=ar1)) + 
  geom_line() + geom_point()  +
  scale_x_date(limits= c(as.Date("1861-01-01"),as.Date("1863-06-01")),
               breaks=break.vec, date_labels = "%b %Y",expand=c(.02,0)) +
  xlab("") + ylab("Number of events (terms of emancipation)") + 
  theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1),
                     panel.grid.minor=element_blank(),
                     panel.grid.major.x=element_blank())

## Events with military response

event %>% group_by(startyear) %>%
  filter(ar == 1 | at == 1) %>%
  summarise(a.serf = sum(a.serf),
            a.serf.mil = sum(a.serf.mil)) %>%
  ggplot(aes(x=startyear,y=a.serf.mil/a.serf)) +
  geom_point() + geom_smooth(color = "black", se=F, span=0.8) +
  labs(caption = "bandwidth = .8", x = "",
       y = "Proportion of events provoking
       military response") +
  theme_bw() +
  theme(panel.grid.minor.x=element_blank(),
        panel.grid.major.x=element_blank())
ggsave("military.pdf")

rm(event.yearly, event.cause, event.monthly, break.vec)

###############
## MORE DATA ##
###############

## Stack events in multiple districts

masteridcols <- names(event[,grep("masterid", names(event))])
masteridcols <- masteridcols[!(masteridcols %in% c("masterid_2_3", 
                                                   "masterid_3_1"))]
left <- which(names(event) == "ar")
right <- which(names(event) == "a.serf.large")
varcols <- names(event)[left:right]
event <- event[,c("eventid", "startmonth", "startyear", masteridcols, varcols, 
                  "ar1")]

event <- gather(event, source, masterid, masteridcols)
event$source <- NULL
event <- event[!is.na(event$masterid), ]

rm(masteridcols, left, right, varcols)

## Location issues

# NAME CHANGES (only): archivists seem to be using both names, 
# population data below recorded under later name
# Move events in Iasski (Bessarabia) to Beletskii (Bessarabia); 
# changed name to Beletskii in 1887
event$masterid[event$masterid==26] <- 21
# Move events in Dinaburgskii (Vitebsk) to Dvinskii (Vitebsk);
# changed name to Dvinskii in 1893
event$masterid[event$masterid==460] <- 452

# BORDER CHANGES
# Move events in Surazhskii (Vitebsk) to Vitebskii (Vitebsk); 
# Surazhskii merged into Vitebskii in 1866
# Below aggregate population in two districts
event$masterid[event$masterid==461] <- 449

# CHANGE IN APSR PAPER NOT IMPLEMENTED HERE: want proper subordination 
# for FEs estimation, have population datafor masterid == 50, events 
# recorded for masterid == 50 only
# Move events in Rostovskii (Ekaterinoslav') to Rostovskii (Don)
# event$masterid[event$masterid==50] <- 513

## Collapse data into district-year, create balanced panel

event_it <- event %>% 
  select(-c(eventid, startmonth)) %>% 
  group_by(masterid, startyear) %>%
  summarise_all(funs(sum))  

uezdlist <- read.csv("UezdJoin.csv")
uezdlist <- filter(uezdlist, !(province_name %in% c("Estliand", "Kurliand", 
                                                   "Lifliand", "Bessarabia")))

# Fix Rostovskii (see above)
uezdlist$province_name[uezdlist$masterid==50] <- "Ekaterinoslav'"
uezdlist$gis_id[uezdlist$masterid==50] <- 563
uezdlist <- uezdlist[uezdlist$masterid!=513, ]

# Not in sample (masterid == 10 is Novaiia zemlia)
uezdlist <- uezdlist[!is.na(uezdlist$gis_id), ]
uezdlist <- uezdlist[uezdlist$masterid!=10, ]

uezdlist.temp <- uezdlist %>% 
  group_by(masterid) %>% 
  expand(startyear = seq(1851, 1871, 1)) %>% 
  ungroup() %>% 
  arrange(masterid, startyear)
uezdlist <- left_join(uezdlist.temp, uezdlist)
uezdyear <- full_join(event_it, uezdlist)
rm(uezdlist, uezdlist.temp)
uezdyear[is.na(uezdyear)] <- 0
uezdyear <- arrange(uezdyear, masterid, startyear)

## Soil

soil <- read.csv("soils.csv")
# Fix Rostovskii (see above)
soil$masterid[soil$masterid==513] <- 50
soil <- soil %>%
  mutate(area = area/1000000,
         area = ifelse(SU_SYM90 %in% 
                         c("HD", "NI", "UR", "WR", ""), 0, area)) %>%
  group_by(masterid) %>% mutate(uezd_area = sum(area)) %>%
  mutate(pctsoil = (area/uezd_area)*100) %>%
  group_by(masterid, SU_SYM90) %>% 
  summarise(pctsoil = sum(pctsoil))
# Convert tibble back to data frame
soil <- data.frame(soil)
soil <- soil[soil$pctsoil!= 0, ]
soil <- reshape(soil, idvar="masterid", v.names="pctsoil",
                timevar="SU_SYM90", direction="wide")
soil[is.na(soil)] <- 0
uezdyear <- merge(uezdyear, soil, by="masterid")
uezdyear$goodsoil <- 
  rowSums(uezdyear[ , c("pctsoil.CHh", "pctsoil.CHk", "pctsoil.CHl", 
                        "pctsoil.CHw", "pctsoil.GRh", "pctsoil.HSf", 
                        "pctsoil.HSi", "pctsoil.HSl", "pctsoil.HSs", 
                        "pctsoil.KSh", "pctsoil.KSk", "pctsoil.KSl", 
                        "pctsoil.PHc", "pctsoil.PHg", "pctsoil.PHh", 
                        "pctsoil.PHl")])/100
uezdyear$blacksoil <- 
  rowSums(uezdyear[ , c("pctsoil.CHh", "pctsoil.CHk", 
                        "pctsoil.CHl", "pctsoil.CHw")])/100
rm(soil)

## Rye

identifiers <- read_csv("identifiers.csv")
rye <- read_csv("rye.csv")
rye <- left_join(rye, identifiers)
rye <- rye %>% select(c(masterid, startyear, rye))
uezdyear <- left_join(uezdyear, rye) %>%
  mutate(rye = rye/100)
rm(identifiers,rye)

## Serf population

district <- read.csv("UezdData_Aug2018.csv")
district <- district %>% 
  select(c(masterid, MaleSerfs_1857, FemaleSerfs_1857))

# Merge the serf population (MaleSerfs_1857 and FemaleSerfs_1857) 
# for Surazhskii and Vitebskii districts
district[district$masterid==449,]$MaleSerfs_1857 <- 
  district[district$masterid==449,]$MaleSerfs_1857 +
  district[district$masterid==461,]$MaleSerfs_1857
district[district$masterid==449,]$FemaleSerfs_1857 <- 
  district[district$masterid==449,]$FemaleSerfs_1857 +
  district[district$masterid==461,]$FemaleSerfs_1857

uezdyear <- left_join(uezdyear,district)
rm(district)

## Binary outcomes and period

years <- unique(uezdyear$startyear)
uezdyear <- uezdyear %>%
  mutate(year = factor(startyear, levels = c("1860", years[years!="1860"])),
         period = case_when(startyear < 1861 ~ "Pre",
                            startyear %in% c(1861,1862) ~ "Transition",
                            startyear > 1862 ~ "Post"),
         period = factor(period, levels = c("Pre", "Transition", "Post")),
         any.serf = ifelse(a.serf > 0, 1, 0),
         any.other = ifelse(a.other > 0, 1, 0),
         any.refusals.serf = ifelse(ar.serf > 0, 1, 0),
         any.refusals.other = ifelse(ar.other > 0, 1, 0),
         any.thefts.serf = ifelse(at.serf > 0, 1, 0),
         any.thefts.other = ifelse(at.other > 0, 1, 0))

## Rejection of terms of liberation

ar1_count <- uezdyear %>%
  filter(period == "Transition") %>%
  filter(ar >= 1 | at >= 1) %>%
  group_by(masterid) %>%
  summarise(ar1_transition_count = sum(ar1))

ar1_count <- left_join(data.frame(masterid = unique(uezdyear$masterid)), 
                       ar1_count) %>%
  mutate(ar1_transition_count = ifelse(is.na(ar1_transition_count), 
                                       0, ar1_transition_count))

##############
## ANALYSIS ##
##############

library(plm)
library(clubSandwich)
library(survival)
library(stargazer)
library(margins)
library(xtable)
library(rgdal)
library(spdep)
library(splm)

## Data frame and sample construction

p.uezdyear <- pdata.frame(uezdyear, index = c("masterid", "startyear"))
provinces <- unique(uezdyear$province_name)
western_provinces <- c("Vilno", "Kovno", "Grodnno", "Minsk", "Mogilev", 
                       "Vitebsk", "Kiev", "Podol'sk", "Volyna")

# Select variables for cross-sectional regression
# Only data from 1861-1862
district_data <- uezdyear %>%
  filter(period=="Transition") %>%
  select(c(masterid, goodsoil, MaleSerfs_1857, FemaleSerfs_1857, 
           province_name)) %>%
  group_by(masterid) %>%
  slice(1) %>%
  ungroup()
district_data <- left_join(district_data, ar1_count)
district_data <- district_data %>%
  mutate(ar1_dum = ifelse(ar1_transition_count > 0, 1, 0),
         Serfs_1857 = (MaleSerfs_1857 + FemaleSerfs_1857) / 100000)

# Remove districts with missing values
district_data <- district_data[complete.cases(district_data),]

## Generic function for running panel regressions

model_run <- function(reg.formula, data.sample, model.type = "linear") {
  if (data.sample == "no western") {
    data.set <- uezdyear %>% filter(!(province_name %in% western_provinces))
  } else if (data.sample == "1858-1862") {
    data.set <- uezdyear %>% filter(startyear %in% 1858:1862)
  } else if (data.sample %in% provinces) {
    data.set <- uezdyear %>% filter(province_name != data.sample)
  } else if (data.sample == "balanced for rye") {
    data.set <- uezdyear.bal.rye
  } else {
    data.set <- uezdyear
  }
  
  # run standard plm for linear models
  if (model.type == "linear") {
    p.data.set <- pdata.frame(data.set, index = c("masterid", "startyear"))
    model <- plm(as.formula(reg.formula), 
                           data = p.data.set, model = "within")
    
    # For interactions, run lm with dummies (instead of plm) to feed into 
    # clubSandwich, because clubSandwich doesn't like interactions in plm 
    # for some reason
    if (grepl("\\*", reg.formula)) {
      lmod <- lm(as.formula(paste0(reg.formula, "+ factor(masterid)")), 
                 data = data.set)
      lmod.se <- sqrt(diag(vcovCR(lmod, type = "CR1S", 
                                  cluster=data.set$province_name))) 
      se <- lmod.se[!grepl("masterid", names(lmod.se))]
    } else {
      se <- sqrt(diag(vcovCR(model, type = "CR1S", 
                                       cluster=data.set$province_name)))
    }
    
    # Run glm with dummies for non-linear models
    # (could do pglm, but clubSandwich does not understand these model objects)
    # later dummies are omitted in stargazer
  } else if (model.type == "poisson") {
    model <- glm(as.formula(paste0(reg.formula, 
                                             " + factor(masterid)")), 
                           data = data.set, family = model.type)
    se <- sqrt(diag(vcovCR(model, type = "CR1S", 
                                     cluster=data.set$province_name))) 
  } else if (model.type == "binomial") {
    model <- glm(as.formula(paste0(reg.formula, 
                                             " + factor(masterid)")), 
                           data = data.set, family = model.type)
    se <- sqrt(diag(vcovCR(model, type = "CR1S", 
                                     cluster=data.set$province_name))) 
  } 
  return(list(model, se))
}

## Panel Regressions

formula.ar <- c("ar.serf ~ period", 
                "ar.serf.tsgaor ~ period", 
                "ar.serf ~ period", 
                "ar.serf.large ~ period",
                "any.refusals.serf ~ period",
                "ar.serf ~ period", 
                "ar.serf ~ period + rye", 
                "ar.other ~ period",
                "ar.other.tsgaor ~ period",
                "ar.other ~ period",
                "ar.other.large ~ period",
                "any.refusals.other ~ period",
                "ar.other ~ period",
                "ar.other ~ period + rye")
formula.at <- gsub("ar\\.", "at.", formula.ar)
formula.at <- gsub("refusals", "thefts", formula.at)
# List of sample restrictions for different models
data.sets <- rep(c("full", "full", "1858-1862", "full", "full", "no western", 
                   "full"), 2) 

# Run through formulas and samples simultaneously
results.ar <- map2(formula.ar, data.sets, model_run)
results.at <- map2(formula.at, data.sets, model_run)

# Balanced panel for rye
# Removing districts which have missing rye data in any of the years
missing.rye <- 
  unique(as.character(uezdyear$masterid)
         [!complete.cases(uezdyear[, c("ar.serf", "period", "rye")])])
uezdyear.bal.rye <- 
  uezdyear[!(as.character(uezdyear$masterid) %in% missing.rye),]
formula.rye <- c("ar.serf ~ period + rye", "ar.other ~ period + rye",
                 "at.serf ~ period + rye", "at.other ~ period + rye")
data.sets.rye <- rep("balanced for rye", 4)
results.rye <- map2(formula.rye, data.sets.rye, model_run)

# Print results for balanced panel
captions <- c("Refusals Serf", "Refusals Other", "Thefts Serf", "Thefts Other")
captions <- paste0(captions, " - Balanced Panel (for Rye)")
for (i in 1:length(results.rye)) {
  print(captions[i])
  print(results.rye[[i]])
}

# Regressions with .no9999
results.ar.no9999 <- model_run("ar.serf.no9999 ~ period", "full")
results.at.no9999 <- model_run("at.serf.no9999 ~ period", "full")
# Print results to console
results.ar.no9999
results.at.no9999

# Check changes in coefs when dropping provinces one at a time
province_check <- function(province) {
  new.model.ar <- model_run(formula.ar[1], province)
  new.model.at <- model_run(formula.at[1], province)
  transition.coef.ar <- new.model.ar[[1]][[1]]
  transition.coef.ar <- transition.coef.ar[names(transition.coef.ar) ==
                                             "periodTransition"]
  transition.coef.at <- new.model.at[[1]][[1]]
  transition.coef.at <- transition.coef.at[names(transition.coef.at) ==
                                             "periodTransition"]
  new.check = data.frame(province_dropped = province, ar.coef = 
                           transition.coef.ar,
                         at.coef = transition.coef.at)
  return(new.check)
}

# Record all coefficients on Transition in a data frame
province_dropping <- map_df(provinces, province_check)
# Print results to console
province_dropping

## Spatial panel models

# Create spatial weights for panel data
rusmap.panel <- readOGR(".", "1897Uezd")
# Fix Rostovskii masterid (see above)
rusmap.panel@data[rusmap.panel@data$masterid == 513, ]$masterid <- 50

# Merge district-level cross-sectional variables with the polygons object data
rusmap.panel <- merge(rusmap.panel, 
                      uezdyear %>% 
                        group_by(masterid) %>%
                        slice(1) %>%
                        ungroup() %>%
                        select(masterid), 
                      by="masterid", all.x = F)

# Inverse-distance matrix
nb.list.panel <- dnearneigh(coordinates(rusmap.panel), 0, 10000)
nb.dist.panel <- nbdists(nb.list.panel, coordinates(rusmap.panel))
inv.dist.panel <- map(nb.dist.panel, function(x) (1/(x^2)))
nb.inv.panel.w <- nb2listw(nb.list.panel, glist=inv.dist.panel, style="B")

# Function to run spatial panel models
spat_model_run <- function(dep.var) {
  reg.formula <- paste0(dep.var, " ~ period")
  model <- spml(formula = as.formula(reg.formula), data = p.uezdyear, 
                listw = nb.inv.panel.w,
                model = "within", effect = "individual", 
                lag = FALSE, spatial.error = "b")
  return(model)
}

spat.panel.dv <- c("ar.serf", "ar.other", "at.serf", "at.other")
spat.panels <- map(spat.panel.dv, spat_model_run) 

# Print results for spatial panels
captions <- c("Refusals Serf", "Refusals Other", "Thefts Serf", "Thefts Other")
for (i in 1:length(spat.panels)) {
  print(captions[i])
  print(summary(spat.panels[[i]]))
}

## Сross-sectional regressions

# Load shapefile
rusmap <- readOGR(".", "1897Uezd")
# Fix Rostovskii masterid (see above)
rusmap@data[rusmap@data$masterid == 513, ]$masterid <- 50

# Merge district-level cross-sectional variables with the polygons object data
rusmap <- merge(rusmap, district_data, by="masterid", all.x = F)

# Inverse-distance matrix
nb.list <- dnearneigh(coordinates(rusmap), 0, 10000)
nb.dist <- nbdists(nb.list, coordinates(rusmap))
inv.dist <- map(nb.dist, function(x) (1/(x^2)))
nb.inv.w <- nb2listw(nb.list, glist=inv.dist, style="B")

# OLS
ar1.fe <- lm(ar1_transition_count ~ goodsoil + Serfs_1857 +
               as.factor(province_name), data = district_data)
ar1.dum.fe <- lm(ar1_dum ~ goodsoil + Serfs_1857 +
                   as.factor(province_name), data = district_data)

# Spatial models
ar1.sp.inv <- errorsarlm(ar1_transition_count ~ goodsoil + 
                                      Serfs_1857 + as.factor(province_name), 
                           data = rusmap, nb.inv.w, zero.policy = T)
ar1.dum.sp.inv <- errorsarlm(ar1_dum ~ goodsoil + 
                                      Serfs_1857 + as.factor(province_name), 
                                    data = rusmap, nb.inv.w, zero.policy = T)

# Function to create a list of SEs for ar1 count
ar1_add_se <- function(model.obj) {
  if (class(model.obj)=="lm") {
    se <- sqrt(diag(vcovCR(model.obj, type = "CR1S", 
                          cluster=district_data$province_name)))
  } else {
    se <- model.obj$rest.se
    names(se) <- gsub("I\\(x - lambda \\* WX\\)", "", names(se))
  }
  return(se)
}

ar1.main.list <- list(ar1.dum.fe, ar1.dum.sp.inv, ar1.fe, ar1.sp.inv)
se.ar1.main.list <- map(ar1.main.list, ar1_add_se)
ar1.main <- list(ar1.main.list, se.ar1.main.list)

## Tables

# Reshape results for stargazer
reshape_results <- function(results.list) {
  model.list <- map(results.list, 1)
  se.list <- map(results.list, 2)
  return(list(model.list, se.list))
}

results.ar <- reshape_results(results.ar)
results.at <- reshape_results(results.at)

# Wrapper for stargazer
gen_tables <- function(fit.list, model.cols, model.group, 
                       dep.var.names = "",
                       cov.labels = "") {
  filename <- paste0(tolower(gsub(" ", "_", model.group)), ".tex")
  stargazer(fit.list[[1]][model.cols], se = fit.list[[2]][model.cols],
            dep.var.labels = "",
            omit = c("masterid", "province_name"),
            column.labels = dep.var.names,
            covariate.labels = cov.labels,
            model.names = F,
            title = model.group,
            out = filename,
            header = F,
            no.space = T,
            font.size = "scriptsize",
            keep.stat = c("n", "rsq", "ll"))
}

serf.var.names <- c("Serf", "Serf/TSGAOR", "Serf/1858-1862", 
                    "Serf/Large", "Serf/Binary", "Serf/No western", "Serf")
other.var.names <- gsub("Serf", "Other", serf.var.names)

gen_tables(results.ar, 1:7, 
           "Refusals Serf", 
           dep.var.names = serf.var.names,
           cov.labels = c("Transition", "Post", "Rye"))
gen_tables(results.ar, 8:14, 
           "Refusals Other", 
           dep.var.names = other.var.names,
           cov.labels = c("Transition", "Post", "Rye"))
gen_tables(results.at, 1:7, 
           "Thefts Serf", 
           dep.var.names = serf.var.names,
           cov.labels = c("Transition", "Post", "Rye"))
gen_tables(results.at, 8:14, 
           "Thefts Other", 
           dep.var.names = other.var.names,
           cov.labels = c("Transition", "Post", "Rye"))
gen_tables(ar1.main, 1:4, 
           "ar1", 
           dep.var.names = c("ar1 dum/province FE", "ar1 dum/spatial",
                             "ar1 count/province FE", "ar1 count/spatial"),
           cov.labels = c("Goodsoil", "Serf population, 1857"))

#########
## MAP ##
#########

library(latticeExtra)

# Load shapefile
rusmap <- readOGR(".", "1897Uezd")
# Fix Rostovskii masterid (see above)
rusmap@data[rusmap@data$masterid == 513, ]$masterid <- 50

# District-level data
district_map <- uezdyear %>%
  filter(period=="Transition") %>%
  select(c(masterid, goodsoil)) %>%
  group_by(masterid) %>%
  slice(1) %>%
  ungroup()
district_map <- left_join(district_map, ar1_count)
district_map <- district_map %>%
  mutate(ar1_dum = ifelse(ar1_transition_count > 0, 1, 0))
rusmap <- merge(rusmap, district_map, by="masterid", all.x = F)
c <- CRS("+proj=aea +lat_1=50 +lat_2=70 +lat_0=60 +lon_0=50 +x_0=0 +y_0=0 
         +ellps=WGS84 +datum=WGS84 +units=m +no_defs")
rusmap <- spTransform(rusmap, c)

# Map of goodsoil
pdf(file="map_goodsoil.pdf")
spplot(rusmap, "goodsoil", col.regions=gray.colors(32, 1, .3), 
       par.settings = list(axis.line = list(col = "transparent")),
       colorkey = list(axis.line = list(col = "black")),
       main=list(label="Fertile soil",cex=1))
dev.off()

# Map of ar1_dum
pdf(file="map_ar1_dum.pdf")
spplot(rusmap, "ar1_dum", col.regions=gray.colors(2, 1, .3), 
       par.settings = list(axis.line = list(col = "transparent")),
       colorkey = FALSE, 
       main=list(label="Unrest involving refusal to accept terms of liberation",
                 cex=1))
dev.off()

